INTELIGENCIA ARTIFICIAL¶
CIENCIAS DE LA COMPUTACIÓN¶
Desarrollo y Despliegue de Aplicaciones IA, ML, y DL¶
Integrantes:
- Diego Carrión
- Eduardo Arce
- Pablo Bravo -> Project Manager
Docente:
- Ing. Remigio Hurtado, PhD
Link GitHub: Applicacion MultiLabel
In [ ]:
# Suppress all outputs except final confirmation messages
import os
# Install required packages
os.system('pip install tensorflow==2.10 > /dev/null 2>&1')
os.system('pip install tensorflow-addons==0.18.0 > /dev/null 2>&1')
os.system('pip install tensorflow-directml-plugin > /dev/null 2>&1')
print("Installing done")
# Download annotations for the MS COCO dataset
os.system('wget -q http://images.cocodataset.org/annotations/annotations_trainval2017.zip')
os.system('unzip -qq annotations_trainval2017.zip')
print("Annotations downloading done")
# Download the MS COCO dataset
os.system('wget -q --no-check-certificate https://images.cocodataset.org/zips/train2017.zip')
os.system('unzip -qq train2017.zip')
print("Dataset downloading done")
Installing done Annotations downloading done Dataset downloading done
In [ ]:
from pycocotools.coco import COCO
from PIL import Image
import os
import random
import matplotlib.pyplot as plt
from collections import Counter
# Ruta al archivo de anotaciones y directorio de imágenes
annotation_file = "/content/annotations/instances_train2017.json"
image_dir = "/content/train2017"
# Cargar dataset COCO
coco = COCO(annotation_file)
# Función para visualizar una imagen
def visualize_image(coco, image_dir, image_id):
# Obtener metadatos de la imagen y anotaciones
image_info = coco.loadImgs(image_id)[0]
annotations = coco.loadAnns(coco.getAnnIds(imgIds=image_id))
labels = [coco.loadCats(ann['category_id'])[0]['name'] for ann in annotations]
# Cargar y mostrar la imagen
image_path = os.path.join(image_dir, image_info['file_name'])
img = Image.open(image_path)
plt.figure(figsize=(8, 6))
plt.imshow(img)
plt.axis('off')
plt.title(f"Dimensions: {image_info['width']}x{image_info['height']}\nLabels: {', '.join(labels)}")
plt.show()
# Función para visualizar múltiples imágenes
def visualize_multiple_images(coco, image_dir, num_images=4):
image_ids = random.sample(list(coco.imgs.keys()), num_images)
plt.figure(figsize=(15, 10))
for i, image_id in enumerate(image_ids):
# Obtener metadatos de la imagen y anotaciones
image_info = coco.loadImgs(image_id)[0]
annotations = coco.loadAnns(coco.getAnnIds(imgIds=image_id))
labels = [coco.loadCats(ann['category_id'])[0]['name'] for ann in annotations]
# Cargar y mostrar la imagen
image_path = os.path.join(image_dir, image_info['file_name'])
img = Image.open(image_path)
plt.subplot(2, 2, i + 1)
plt.imshow(img)
plt.axis('off')
plt.title(f"Labels: {', '.join(labels)}")
plt.tight_layout()
plt.show()
# Función para graficar la frecuencia de etiquetas
def plot_label_frequency(coco):
annotations = coco.loadAnns(coco.getAnnIds())
label_counts = Counter([coco.loadCats(ann['category_id'])[0]['name'] for ann in annotations])
labels, counts = zip(*label_counts.items())
plt.figure(figsize=(20, 8))
plt.bar(labels, counts, color='skyblue')
plt.xticks(rotation=90)
plt.xlabel("Labels")
plt.ylabel("Frequency")
plt.title("Frequency of Labels in MS COCO Dataset")
plt.show()
# Uso de ejemplo
# Visualizar una imagen aleatoria
visualize_image(coco, image_dir, random.choice(list(coco.imgs.keys())))
# Visualizar múltiples imágenes
visualize_multiple_images(coco, image_dir, num_images=4)
# Graficar la frecuencia de etiquetas
plot_label_frequency(coco)
loading annotations into memory... Done (t=15.43s) creating index... index created!
In [ ]:
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf
# Configuración de rutas
ANNOTATIONS_FILE = "/content/annotations/instances_train2017.json"
IMAGES_DIR = "/content/train2017"
# Cargar anotaciones completas
coco = COCO(ANNOTATIONS_FILE)
# Mapear IDs de categorÃa a Ãndices consecutivos
categories = coco.loadCats(coco.getCatIds())
category_id_to_index = {cat['id']: idx for idx, cat in enumerate(categories)}
num_classes = len(category_id_to_index)
# Procesar imágenes y etiquetas
def preprocess_image_and_label(img_id, coco, img_dir, img_size=(256, 256)):
try:
img_data = coco.loadImgs(img_id)[0]
img_path = os.path.join(img_dir, img_data['file_name'])
# Procesar imagen
img = Image.open(img_path).convert('RGB')
img = img.resize(img_size)
img = np.array(img) / 255.0
# Procesar etiquetas
annotations = coco.loadAnns(coco.getAnnIds(imgIds=img_id))
labels = np.zeros(num_classes)
for ann in annotations:
category_id = ann['category_id']
if category_id in category_id_to_index:
labels[category_id_to_index[category_id]] = 1
return img, labels
except Exception as e:
print(f"Error procesando imagen {img_id}: {e}")
return None, None
# Aumento de datos dinámico
def augment_image(image, label):
image = tf.image.random_flip_left_right(image)
image = tf.image.random_brightness(image, max_delta=0.1)
image = tf.image.random_contrast(image, lower=0.8, upper=1.2)
return image, label
# Generador de datos con procesamiento por lotes
def create_batched_tf_dataset(coco, img_dir, batch_size=32, img_size=(256, 256), augment=False, batch_limit=None):
"""
Crea un dataset procesando imágenes por lotes desde las anotaciones COCO.
"""
all_image_ids = list(coco.getImgIds())
total_batches = len(all_image_ids) // batch_size + 1
def generator():
for batch_idx in range(total_batches):
start_idx = batch_idx * batch_size
end_idx = start_idx + batch_size
if batch_limit and batch_idx >= batch_limit: # Limitar la cantidad de lotes cargados
break
batch_ids = all_image_ids[start_idx:end_idx]
for img_id in batch_ids:
try:
# Procesar cada imagen y etiqueta
img, labels = preprocess_image_and_label(img_id, coco, img_dir, img_size)
if img is not None and labels is not None:
yield img, labels
except Exception as e:
print(f"Error procesando imagen {img_id}: {e}")
continue
dataset = tf.data.Dataset.from_generator(
generator,
output_signature=(
tf.TensorSpec(shape=(256, 256, 3), dtype=tf.float32),
tf.TensorSpec(shape=(num_classes,), dtype=tf.float32),
)
)
# Aplicar aumento de datos dinámico si es necesario
if augment:
dataset = dataset.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
return dataset
# Crear datasets dinámicos por lotes
train_dataset = create_batched_tf_dataset(coco, IMAGES_DIR, batch_size=16, augment=True, batch_limit=500)
val_dataset = create_batched_tf_dataset(coco, IMAGES_DIR, batch_size=16, augment=False, batch_limit=100)
# Modelo base
base_model = Xception(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
base_model.trainable = False
# Construir el modelo
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.4)(x)
output = Dense(num_classes, activation='sigmoid', dtype='float32')(x)
model = Model(inputs=base_model.input, outputs=output)
# Compilación del modelo
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
# Callbacks
checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min')
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
# Entrenar el modelo con Class Weighting y Data Augmentation
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=14,
callbacks=[checkpoint, early_stop],
verbose=1
)
# Fine-Tuning
base_model.trainable = True
model.compile(optimizer=RMSprop(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])
history_fine = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=20,
callbacks=[checkpoint, early_stop],
verbose=1
)
# Evaluación
loss, accuracy = model.evaluate(val_dataset, verbose=1)
print(f"Pérdida en validación: {loss}")
print(f"Precisión en validación: {accuracy}")
loading annotations into memory... Done (t=16.25s) creating index... index created! Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5 83683744/83683744 [==============================] - 5s 0us/step Epoch 1/14 500/500 [==============================] - 413s 819ms/step - loss: 0.1176 - accuracy: 0.5141 - val_loss: 0.0739 - val_accuracy: 0.5950 Epoch 2/14 500/500 [==============================] - 408s 817ms/step - loss: 0.0797 - accuracy: 0.5785 - val_loss: 0.0644 - val_accuracy: 0.6231 Epoch 3/14 500/500 [==============================] - 406s 812ms/step - loss: 0.0720 - accuracy: 0.5882 - val_loss: 0.0601 - val_accuracy: 0.6225 Epoch 4/14 500/500 [==============================] - 407s 814ms/step - loss: 0.0679 - accuracy: 0.5888 - val_loss: 0.0577 - val_accuracy: 0.6400 Epoch 5/14 500/500 [==============================] - 407s 815ms/step - loss: 0.0648 - accuracy: 0.5966 - val_loss: 0.0542 - val_accuracy: 0.6319 Epoch 6/14 500/500 [==============================] - 406s 812ms/step - loss: 0.0620 - accuracy: 0.5936 - val_loss: 0.0535 - val_accuracy: 0.6594 Epoch 7/14 500/500 [==============================] - 406s 812ms/step - loss: 0.0599 - accuracy: 0.6021 - val_loss: 0.0497 - val_accuracy: 0.6375 Epoch 8/14 500/500 [==============================] - 405s 810ms/step - loss: 0.0574 - accuracy: 0.5960 - val_loss: 0.0483 - val_accuracy: 0.6369 Epoch 9/14 500/500 [==============================] - 406s 811ms/step - loss: 0.0554 - accuracy: 0.5924 - val_loss: 0.0449 - val_accuracy: 0.6294 Epoch 10/14 500/500 [==============================] - 406s 812ms/step - loss: 0.0532 - accuracy: 0.6019 - val_loss: 0.0445 - val_accuracy: 0.6381 Epoch 11/14 500/500 [==============================] - 405s 811ms/step - loss: 0.0517 - accuracy: 0.6055 - val_loss: 0.0432 - val_accuracy: 0.6438 Epoch 12/14 500/500 [==============================] - 405s 811ms/step - loss: 0.0496 - accuracy: 0.5985 - val_loss: 0.0403 - val_accuracy: 0.6300 Epoch 13/14 500/500 [==============================] - 405s 811ms/step - loss: 0.0485 - accuracy: 0.6047 - val_loss: 0.0393 - val_accuracy: 0.6425 Epoch 14/14 500/500 [==============================] - 407s 814ms/step - loss: 0.0470 - accuracy: 0.6047 - val_loss: 0.0381 - val_accuracy: 0.6406 Epoch 1/20 500/500 [==============================] - 968s 2s/step - loss: 0.0828 - accuracy: 0.5836 - val_loss: 0.0497 - val_accuracy: 0.6706 Epoch 2/20 500/500 [==============================] - 956s 2s/step - loss: 0.0603 - accuracy: 0.6426 - val_loss: 0.0406 - val_accuracy: 0.6781 Epoch 3/20 500/500 [==============================] - 956s 2s/step - loss: 0.0492 - accuracy: 0.6684 - val_loss: 0.0369 - val_accuracy: 0.6875 Epoch 4/20 500/500 [==============================] - 956s 2s/step - loss: 0.0421 - accuracy: 0.6883 - val_loss: 0.0341 - val_accuracy: 0.6706 Epoch 5/20 500/500 [==============================] - 957s 2s/step - loss: 0.0372 - accuracy: 0.6966 - val_loss: 0.0334 - val_accuracy: 0.6875 Epoch 6/20 500/500 [==============================] - 956s 2s/step - loss: 0.0333 - accuracy: 0.6991 - val_loss: 0.0297 - val_accuracy: 0.6969 Epoch 7/20 500/500 [==============================] - 956s 2s/step - loss: 0.0302 - accuracy: 0.7094 - val_loss: 0.0280 - val_accuracy: 0.6869 Epoch 8/20 500/500 [==============================] - 954s 2s/step - loss: 0.0275 - accuracy: 0.7088 - val_loss: 0.0257 - val_accuracy: 0.6719 Epoch 9/20 500/500 [==============================] - 957s 2s/step - loss: 0.0254 - accuracy: 0.7020 - val_loss: 0.0242 - val_accuracy: 0.6706 Epoch 10/20 500/500 [==============================] - 955s 2s/step - loss: 0.0234 - accuracy: 0.7088 - val_loss: 0.0238 - val_accuracy: 0.6900 Epoch 11/20 500/500 [==============================] - 955s 2s/step - loss: 0.0220 - accuracy: 0.7066 - val_loss: 0.0217 - val_accuracy: 0.6869 Epoch 12/20 500/500 [==============================] - 955s 2s/step - loss: 0.0206 - accuracy: 0.7029 - val_loss: 0.0209 - val_accuracy: 0.6900 Epoch 13/20 500/500 [==============================] - 955s 2s/step - loss: 0.0190 - accuracy: 0.6964 - val_loss: 0.0197 - val_accuracy: 0.6931 Epoch 14/20 500/500 [==============================] - 955s 2s/step - loss: 0.0182 - accuracy: 0.7107 - val_loss: 0.0184 - val_accuracy: 0.7156 Epoch 15/20 500/500 [==============================] - 955s 2s/step - loss: 0.0172 - accuracy: 0.7080 - val_loss: 0.0188 - val_accuracy: 0.7000 Epoch 16/20 500/500 [==============================] - 955s 2s/step - loss: 0.0165 - accuracy: 0.7025 - val_loss: 0.0180 - val_accuracy: 0.7075 Epoch 17/20 500/500 [==============================] - 954s 2s/step - loss: 0.0157 - accuracy: 0.7031 - val_loss: 0.0179 - val_accuracy: 0.7063 Epoch 18/20 500/500 [==============================] - 955s 2s/step - loss: 0.0150 - accuracy: 0.7001 - val_loss: 0.0174 - val_accuracy: 0.7006 Epoch 19/20 500/500 [==============================] - 955s 2s/step - loss: 0.0143 - accuracy: 0.7064 - val_loss: 0.0170 - val_accuracy: 0.7325 Epoch 20/20 500/500 [==============================] - 955s 2s/step - loss: 0.0139 - accuracy: 0.6998 - val_loss: 0.0159 - val_accuracy: 0.6963 100/100 [==============================] - 67s 672ms/step - loss: 0.0159 - accuracy: 0.6963 Pérdida en validación: 0.01589490845799446 Precisión en validación: 0.6962500214576721
In [ ]:
import os
import numpy as np
from tensorflow.keras.models import load_model
from PIL import Image
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
# Configuración de COCO y categorÃas
ANNOTATIONS_FILE = "/content/annotations/instances_train2017.json"
coco = COCO(ANNOTATIONS_FILE)
categories = coco.loadCats(coco.getCatIds())
category_id_to_index = {cat['id']: idx for idx, cat in enumerate(categories)}
# Función de predicción
def predict_images(model, image_paths, threshold=0.5):
"""
Realiza predicciones usando el modelo en múltiples imágenes.
Parámetros:
- model: Modelo Keras entrenado.
- image_paths: Lista de rutas de imágenes.
- threshold: Umbral para clasificar las categorÃas como presentes.
Retorna:
- Diccionario con rutas de imágenes y sus categorÃas predichas.
"""
# Tamaño esperado por el modelo
img_size = (256, 256) # Ajusta según tu modelo
results = {}
for image_path in image_paths:
# Cargar y procesar la imagen
img = Image.open(image_path).convert('RGB').resize(img_size)
img_array = np.array(img) / 255.0
img_array = np.expand_dims(img_array, axis=0) # Añadir batch dimension
# Hacer la predicción
prediction = model.predict(img_array)[0] # Tomar la primera predicción (batch único)
# Convertir Ãndices a etiquetas según el umbral
predicted_categories = [
categories[idx]['name'] for idx, prob in enumerate(prediction) if prob > threshold
]
# Guardar los resultados
results[image_path] = predicted_categories
return results
# Cargar el modelo guardado
model = load_model('/content/best_model.keras')
# Obtener imágenes desde una carpeta
def get_image_paths_from_folder(folder_path):
"""
Obtiene todas las rutas de imágenes en una carpeta.
Parámetros:
- folder_path: Ruta de la carpeta que contiene las imágenes.
Retorna:
- Lista de rutas de imágenes.
"""
supported_formats = {".jpg", ".jpeg", ".png"}
return [
os.path.join(folder_path, f) for f in os.listdir(folder_path)
if os.path.isfile(os.path.join(folder_path, f)) and os.path.splitext(f)[1].lower() in supported_formats
]
# Especificar la ruta de la carpeta con imágenes
folder_path = "/content/images_folder" # Cambia esta ruta según tu configuración
image_paths = get_image_paths_from_folder(folder_path)[:26] # Tomar hasta 26 imágenes
# Predecir desde las imágenes en la carpeta
predictions = predict_images(
model,
image_paths=image_paths,
threshold=0.2
)
# Mostrar resultados
for image_path, predicted_categories in predictions.items():
print(f"Imagen: {image_path}\nCategorÃas predichas: {predicted_categories}\n")
# Visualizar cada imagen con sus categorÃas predichas
img = Image.open(image_path).convert('RGB')
plt.imshow(img)
plt.title("Predicción: " + ", ".join(predicted_categories))
plt.axis("off")
plt.show()
loading annotations into memory... Done (t=16.53s) creating index... index created! 1/1 [==============================] - 1s 849ms/step 1/1 [==============================] - 0s 124ms/step 1/1 [==============================] - 0s 128ms/step 1/1 [==============================] - 0s 126ms/step 1/1 [==============================] - 0s 124ms/step 1/1 [==============================] - 0s 108ms/step 1/1 [==============================] - 0s 108ms/step 1/1 [==============================] - 0s 90ms/step 1/1 [==============================] - 0s 114ms/step 1/1 [==============================] - 0s 100ms/step 1/1 [==============================] - 0s 104ms/step 1/1 [==============================] - 0s 104ms/step 1/1 [==============================] - 0s 112ms/step 1/1 [==============================] - 0s 90ms/step 1/1 [==============================] - 0s 104ms/step 1/1 [==============================] - 0s 108ms/step 1/1 [==============================] - 0s 121ms/step 1/1 [==============================] - 0s 91ms/step 1/1 [==============================] - 0s 88ms/step 1/1 [==============================] - 0s 92ms/step 1/1 [==============================] - 0s 110ms/step 1/1 [==============================] - 0s 106ms/step 1/1 [==============================] - 0s 91ms/step Imagen: /content/images_folder/test012.jpg CategorÃas predichas: ['person', 'bicycle', 'motorcycle']
Imagen: /content/images_folder/test019.jpg CategorÃas predichas: ['boat', 'bird']
Imagen: /content/images_folder/test010.jpg CategorÃas predichas: ['bird']
Imagen: /content/images_folder/test009.jpg CategorÃas predichas: ['person', 'car', 'motorcycle', 'truck']
Imagen: /content/images_folder/test006.jpg CategorÃas predichas: ['bench']
Imagen: /content/images_folder/test004.jpg CategorÃas predichas: ['person', 'bicycle', 'bench', 'bird']
Imagen: /content/images_folder/cf0d9be8-434f-4a16-a402-8e13ec009d10.png CategorÃas predichas: ['dog']
Imagen: /content/images_folder/test005.png CategorÃas predichas: ['person', 'bench']
Imagen: /content/images_folder/test017.jpg CategorÃas predichas: []
Imagen: /content/images_folder/test018.jpg CategorÃas predichas: ['car', 'truck']
Imagen: /content/images_folder/test007.jpg CategorÃas predichas: ['bench']
Imagen: /content/images_folder/test020.jpg CategorÃas predichas: ['person', 'dog']
Imagen: /content/images_folder/test002.jpg CategorÃas predichas: ['person', 'sports ball']
Imagen: /content/images_folder/test_4.jpg CategorÃas predichas: ['car', 'truck']
Imagen: /content/images_folder/test001.jpg CategorÃas predichas: ['person', 'sports ball']
Imagen: /content/images_folder/test016.jpg CategorÃas predichas: []
Imagen: /content/images_folder/test003.jpg CategorÃas predichas: []
Imagen: /content/images_folder/test008.jpg CategorÃas predichas: ['person', 'car', 'truck', 'traffic light']
Imagen: /content/images_folder/test011.jpg CategorÃas predichas: ['stop sign']
Imagen: /content/images_folder/test_5.jpg CategorÃas predichas: []
Imagen: /content/images_folder/test013.jpg CategorÃas predichas: ['person', 'dog', 'sports ball']
Imagen: /content/images_folder/test014.jpg CategorÃas predichas: ['person', 'sports ball']
Imagen: /content/images_folder/test015.jpg CategorÃas predichas: ['person', 'bird']